Loading dataset
Telecom_Data <- data.frame(read.csv("Telecom Data.csv"))
ncol(Telecom_Data)
## [1] 58
nrow(Telecom_Data)
## [1] 51047
There are total 58 Columns and 51,047 Rows
Converting few columns to factor
Telecom_Data$Churn <- factor(Telecom_Data$Churn)
Telecom_Data$CreditRating <- factor(Telecom_Data$CreditRating)
Telecom_Data$Occupation <- factor(Telecom_Data$Occupation)
Let’s check for null values
library(dplyr)
library(tidyr)
## Checking the null values in the dataset
#summary(Telecom_Data)
#is.null(Telecom_Data)
null_values<-sapply(Telecom_Data, function(x) sum(is.na(x)))
null_values
## CustomerID Churn MonthlyRevenue
## 0 0 156
## MonthlyMinutes TotalRecurringCharge DirectorAssistedCalls
## 156 156 156
## OverageMinutes RoamingCalls PercChangeMinutes
## 156 156 367
## PercChangeRevenues DroppedCalls BlockedCalls
## 367 0 0
## UnansweredCalls CustomerCareCalls ThreewayCalls
## 0 0 0
## ReceivedCalls OutboundCalls InboundCalls
## 0 0 0
## PeakCallsInOut OffPeakCallsInOut DroppedBlockedCalls
## 0 0 0
## CallForwardingCalls CallWaitingCalls MonthsInService
## 0 0 0
## UniqueSubs ActiveSubs ServiceArea
## 0 0 0
## Handsets HandsetModels CurrentEquipmentDays
## 1 1 1
## AgeHH1 AgeHH2 ChildrenInHH
## 909 909 0
## HandsetRefurbished HandsetWebCapable TruckOwner
## 0 0 0
## RVOwner Homeownership BuysViaMailOrder
## 0 0 0
## RespondsToMailOffers OptOutMailings NonUSTravel
## 0 0 0
## OwnsComputer HasCreditCard RetentionCalls
## 0 0 0
## RetentionOffersAccepted NewCellphoneUser NotNewCellphoneUser
## 0 0 0
## ReferralsMadeBySubscriber IncomeGroup OwnsMotorcycle
## 0 0 0
## AdjustmentsToCreditRating HandsetPrice MadeCallToRetentionTeam
## 0 0 0
## CreditRating PrizmCode Occupation
## 0 0 0
## MaritalStatus
## 0
Few columns have null values but the count is less,
Lets Create New Variables which will help in analysis
##Creation of new variables for our analysis
Telecom_Data$perc_recurrent_charge <- (Telecom_Data$TotalRecurringCharge /Telecom_Data$MonthlyRevenue) * 100
Telecom_Data$perc_overage_minute <- (Telecom_Data$OverageMinutes / Telecom_Data$MonthlyMinutes) * 100
str(Telecom_Data)
## 'data.frame': 51047 obs. of 60 variables:
## $ CustomerID : int 3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
## $ Churn : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 1 1 1 1 1 ...
## $ MonthlyRevenue : num 24 17 38 82.3 17.1 ...
## $ MonthlyMinutes : int 219 10 8 1312 0 682 26 98 24 1056 ...
## $ TotalRecurringCharge : int 22 17 38 75 17 52 30 66 35 75 ...
## $ DirectorAssistedCalls : num 0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
## $ OverageMinutes : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RoamingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PercChangeMinutes : int -157 -4 -2 157 0 148 60 24 20 43 ...
## $ PercChangeRevenues : num -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
## $ DroppedCalls : num 0.7 0.3 0 52 0 9 0 0 0 0 ...
## $ BlockedCalls : num 0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
## $ UnansweredCalls : num 6.3 2.7 0 76 0 13 2.3 4 1 0 ...
## $ CustomerCareCalls : num 0 0 0 4.3 0 0.7 0 4 0 0 ...
## $ ThreewayCalls : num 0 0 0 1.3 0 0 0 0 0 0 ...
## $ ReceivedCalls : num 97.2 0 0.4 200.3 0 ...
## $ OutboundCalls : num 0 0 0.3 370.3 0 ...
## $ InboundCalls : num 0 0 0 147 0 0 0 0 1.7 0 ...
## $ PeakCallsInOut : num 58 5 1.3 555.7 0 ...
## $ OffPeakCallsInOut : num 24 1 3.7 303.7 0 ...
## $ DroppedBlockedCalls : num 1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
## $ CallForwardingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CallWaitingCalls : num 0.3 0 0 22.7 0 0.7 0 0 0 0 ...
## $ MonthsInService : int 61 58 60 59 53 53 57 59 53 55 ...
## $ UniqueSubs : int 2 1 1 2 2 1 2 2 3 1 ...
## $ ActiveSubs : int 1 1 1 2 2 1 2 2 3 1 ...
## $ ServiceArea : chr "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
## $ Handsets : int 2 2 1 9 4 3 2 3 4 9 ...
## $ HandsetModels : int 2 1 1 4 3 2 2 3 3 5 ...
## $ CurrentEquipmentDays : int 361 1504 1812 458 852 231 601 464 544 388 ...
## $ AgeHH1 : int 62 40 26 30 46 28 52 46 36 46 ...
## $ AgeHH2 : int 0 42 26 0 54 0 58 46 34 68 ...
## $ ChildrenInHH : chr "No" "Yes" "Yes" "No" ...
## $ HandsetRefurbished : chr "No" "No" "No" "No" ...
## $ HandsetWebCapable : chr "Yes" "No" "No" "Yes" ...
## $ TruckOwner : chr "No" "No" "No" "No" ...
## $ RVOwner : chr "No" "No" "No" "No" ...
## $ Homeownership : chr "Known" "Known" "Unknown" "Known" ...
## $ BuysViaMailOrder : chr "Yes" "Yes" "No" "Yes" ...
## $ RespondsToMailOffers : chr "Yes" "Yes" "No" "Yes" ...
## $ OptOutMailings : chr "No" "No" "No" "No" ...
## $ NonUSTravel : chr "No" "No" "No" "No" ...
## $ OwnsComputer : chr "Yes" "Yes" "No" "No" ...
## $ HasCreditCard : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RetentionCalls : int 1 0 0 0 0 0 0 0 0 0 ...
## $ RetentionOffersAccepted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NewCellphoneUser : chr "No" "Yes" "Yes" "Yes" ...
## $ NotNewCellphoneUser : chr "No" "No" "No" "No" ...
## $ ReferralsMadeBySubscriber: int 0 0 0 0 0 0 0 0 0 0 ...
## $ IncomeGroup : int 4 5 6 6 9 1 9 6 9 5 ...
## $ OwnsMotorcycle : chr "No" "No" "No" "No" ...
## $ AdjustmentsToCreditRating: int 0 0 0 0 1 1 1 0 0 1 ...
## $ HandsetPrice : chr "30" "30" "Unknown" "10" ...
## $ MadeCallToRetentionTeam : chr "Yes" "No" "No" "No" ...
## $ CreditRating : Factor w/ 7 levels "1-Highest","2-High",..: 1 4 3 4 1 3 1 1 1 3 ...
## $ PrizmCode : chr "Suburban" "Suburban" "Town" "Other" ...
## $ Occupation : Factor w/ 8 levels "Clerical","Crafts",..: 5 5 2 4 5 4 7 5 4 5 ...
## $ MaritalStatus : chr "No" "Yes" "Yes" "No" ...
## $ perc_recurrent_charge : num 91.7 100.1 100 91.2 99.2 ...
## $ perc_overage_minute : num 0 0 0 0 NaN 0 0 0 0 0 ...
Lets calculate churn rate
## Getting Churn counts
churn_counts<- dplyr::count(Telecom_Data,Churn , sort = TRUE)
Lets plot the churn rate using pie chart
library("ggplot2")
ggplot(data = churn_counts, aes(x = "", y = n, fill = Churn)) +
geom_bar(stat = "identity") +
coord_polar("y")
Trying out plotly for pie chart for more interactive graphs
library(plotly)
colors <- c('rgb(211,94,96)', 'rgb(128,133,133)', 'rgb(144,103,167)', 'rgb(171,104,87)', 'rgb(114,147,203)')
fig <- plot_ly(type='pie', labels=churn_counts$Churn, values=churn_counts$n,
textinfo='label+percent',
insidetextorientation='radial',marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
fig
Subsetting data for in depth analysis on the basis of churn and checking the summary of the divided data to analyze the trend
library(dplyr)
Telecom_Data_yes = filter(Telecom_Data, Churn == "Yes")
Telecom_Data_no = filter(Telecom_Data, Churn == "No")
summary(Telecom_Data_yes)
## CustomerID Churn MonthlyRevenue MonthlyMinutes
## Min. :3000002 No : 0 Min. : 0 Min. : 0
## 1st Qu.:3099298 Yes:14711 1st Qu.: 33 1st Qu.: 132
## Median :3195614 Median : 48 Median : 330
## Mean :3194322 Mean : 58 Mean : 484
## 3rd Qu.:3286308 3rd Qu.: 70 3rd Qu.: 667
## Max. :3399978 Max. :861 Max. :5410
## NA's :70 NA's :70
## TotalRecurringCharge DirectorAssistedCalls OverageMinutes RoamingCalls
## Min. :-11 Min. : 0.0 Min. : 0 Min. : 0
## 1st Qu.: 30 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0
## Median : 44 Median : 0.2 Median : 4 Median : 0
## Mean : 45 Mean : 0.8 Mean : 43 Mean : 1
## 3rd Qu.: 55 3rd Qu.: 0.7 3rd Qu.: 46 3rd Qu.: 0
## Max. :338 Max. :45.8 Max. :2018 Max. :851
## NA's :70 NA's :70 NA's :70 NA's :70
## PercChangeMinutes PercChangeRevenues DroppedCalls BlockedCalls
## Min. :-2868 Min. :-851 Min. : 0.0 Min. : 0.0
## 1st Qu.: -101 1st Qu.: -8 1st Qu.: 0.7 1st Qu.: 0.0
## Median : -11 Median : 0 Median : 3.0 Median : 1.0
## Mean : -25 Mean : 0 Mean : 5.8 Mean : 4.0
## 3rd Qu.: 54 3rd Qu.: 2 3rd Qu.: 7.3 3rd Qu.: 3.3
## Max. : 5192 Max. :2484 Max. :208.7 Max. :314.7
## NA's :208 NA's :208
## UnansweredCalls CustomerCareCalls ThreewayCalls ReceivedCalls OutboundCalls
## Min. : 0 Min. : 0.0 Min. : 0.00 Min. : 0 Min. : 0
## 1st Qu.: 4 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 6 1st Qu.: 2
## Median : 15 Median : 0.0 Median : 0.00 Median : 45 Median : 12
## Mean : 26 Mean : 1.6 Mean : 0.26 Mean : 105 Mean : 24
## 3rd Qu.: 34 3rd Qu.: 1.3 3rd Qu.: 0.30 3rd Qu.: 140 3rd Qu.: 32
## Max. :849 Max. :172.3 Max. :30.00 Max. :2619 Max. :520
##
## InboundCalls PeakCallsInOut OffPeakCallsInOut DroppedBlockedCalls
## Min. : 0.0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 19 1st Qu.: 9 1st Qu.: 2
## Median : 1.7 Median : 58 Median : 31 Median : 5
## Mean : 7.3 Mean : 84 Mean : 62 Mean : 10
## 3rd Qu.: 8.0 3rd Qu.: 114 3rd Qu.: 80 3rd Qu.: 12
## Max. :298.3 Max. :1359 Max. :1314 Max. :329
##
## CallForwardingCalls CallWaitingCalls MonthsInService UniqueSubs
## Min. : 0.0 Min. : 0.0 Min. : 6 Min. : 1.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.:12 1st Qu.: 1.0
## Median : 0.0 Median : 0.0 Median :17 Median : 1.0
## Mean : 0.0 Mean : 1.6 Mean :19 Mean : 1.6
## 3rd Qu.: 0.0 3rd Qu.: 1.3 3rd Qu.:24 3rd Qu.: 2.0
## Max. :33.7 Max. :135.7 Max. :61 Max. :196.0
##
## ActiveSubs ServiceArea Handsets HandsetModels
## Min. : 0.0 Length:14711 Min. : 1.00 Min. : 1.0
## 1st Qu.: 1.0 Class :character 1st Qu.: 1.00 1st Qu.: 1.0
## Median : 1.0 Mode :character Median : 1.00 Median : 1.0
## Mean : 1.4 Mean : 1.74 Mean : 1.5
## 3rd Qu.: 2.0 3rd Qu.: 2.00 3rd Qu.: 2.0
## Max. :53.0 Max. :22.00 Max. :14.0
##
## CurrentEquipmentDays AgeHH1 AgeHH2 ChildrenInHH
## Min. : -4 Min. : 0.0 Min. : 0.0 Length:14711
## 1st Qu.: 249 1st Qu.: 0.0 1st Qu.: 0.0 Class :character
## Median : 366 Median :34.0 Median : 0.0 Mode :character
## Mean : 422 Mean :30.3 Mean :20.4
## 3rd Qu.: 564 3rd Qu.:48.0 3rd Qu.:42.0
## Max. :1779 Max. :98.0 Max. :99.0
## NA's :249 NA's :249
## HandsetRefurbished HandsetWebCapable TruckOwner RVOwner
## Length:14711 Length:14711 Length:14711 Length:14711
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Homeownership BuysViaMailOrder RespondsToMailOffers OptOutMailings
## Length:14711 Length:14711 Length:14711 Length:14711
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## NonUSTravel OwnsComputer HasCreditCard RetentionCalls
## Length:14711 Length:14711 Length:14711 Min. :0.00
## Class :character Class :character Class :character 1st Qu.:0.00
## Mode :character Mode :character Mode :character Median :0.00
## Mean :0.06
## 3rd Qu.:0.00
## Max. :4.00
##
## RetentionOffersAccepted NewCellphoneUser NotNewCellphoneUser
## Min. :0.000 Length:14711 Length:14711
## 1st Qu.:0.000 Class :character Class :character
## Median :0.000 Mode :character Mode :character
## Mean :0.026
## 3rd Qu.:0.000
## Max. :3.000
##
## ReferralsMadeBySubscriber IncomeGroup OwnsMotorcycle
## Min. :0.00 Min. :0.00 Length:14711
## 1st Qu.:0.00 1st Qu.:0.00 Class :character
## Median :0.00 Median :5.00 Mode :character
## Mean :0.05 Mean :4.26
## 3rd Qu.:0.00 3rd Qu.:7.00
## Max. :9.00 Max. :9.00
##
## AdjustmentsToCreditRating HandsetPrice MadeCallToRetentionTeam
## Min. :0.00 Length:14711 Length:14711
## 1st Qu.:0.00 Class :character Class :character
## Median :0.00 Mode :character Mode :character
## Mean :0.04
## 3rd Qu.:0.00
## Max. :9.00
##
## CreditRating PrizmCode Occupation MaritalStatus
## 1-Highest:2628 Length:14711 Other :10932 Length:14711
## 2-High :5712 Class :character Professional: 2467 Class :character
## 3-Good :2608 Mode :character Crafts : 426 Mode :character
## 4-Medium :1399 Clerical : 289
## 5-Low :1436 Self : 243
## 6-VeryLow: 316 Retired : 185
## 7-Lowest : 612 (Other) : 169
## perc_recurrent_charge perc_overage_minute
## Min. :-30 Min. : 0
## 1st Qu.: 69 1st Qu.: 0
## Median : 94 Median : 1
## Mean : 88 Mean : 7
## 3rd Qu.:106 3rd Qu.: 10
## Max. :514 Max. :100
## NA's :72 NA's :512
summary(Telecom_Data_no)
## CustomerID Churn MonthlyRevenue MonthlyMinutes
## Min. :3000014 No :36336 Min. : -6 Min. : 0
## 1st Qu.:3101025 Yes: 0 1st Qu.: 34 1st Qu.: 170
## Median :3204388 Median : 49 Median : 381
## Mean :3205048 Mean : 59 Mean : 543
## 3rd Qu.:3313601 3rd Qu.: 72 3rd Qu.: 743
## Max. :3399994 Max. :1223 Max. :7359
## NA's :86 NA's :86
## TotalRecurringCharge DirectorAssistedCalls OverageMinutes RoamingCalls
## Min. : -9 Min. : 0.0 Min. : 0 Min. : 0
## 1st Qu.: 30 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0
## Median : 45 Median : 0.2 Median : 2 Median : 0
## Mean : 48 Mean : 0.9 Mean : 39 Mean : 1
## 3rd Qu.: 60 3rd Qu.: 1.0 3rd Qu.: 39 3rd Qu.: 0
## Max. :400 Max. :159.4 Max. :4321 Max. :1112
## NA's :86 NA's :86 NA's :86 NA's :86
## PercChangeMinutes PercChangeRevenues DroppedCalls BlockedCalls
## Min. :-3875 Min. :-1108 Min. : 0.0 Min. : 0
## 1st Qu.: -78 1st Qu.: -7 1st Qu.: 1.0 1st Qu.: 0
## Median : -3 Median : 0 Median : 3.0 Median : 1
## Mean : -6 Mean : -1 Mean : 6.1 Mean : 4
## 3rd Qu.: 70 3rd Qu.: 2 3rd Qu.: 7.7 3rd Qu.: 4
## Max. : 4480 Max. : 1347 Max. :221.7 Max. :384
## NA's :159 NA's :159
## UnansweredCalls CustomerCareCalls ThreewayCalls ReceivedCalls OutboundCalls
## Min. : 0 Min. : 0 Min. : 0.0 Min. : 0 Min. : 0
## 1st Qu.: 6 1st Qu.: 0 1st Qu.: 0.0 1st Qu.: 10 1st Qu.: 4
## Median : 17 Median : 0 Median : 0.0 Median : 56 Median : 14
## Mean : 29 Mean : 2 Mean : 0.3 Mean : 119 Mean : 26
## 3rd Qu.: 37 3rd Qu.: 2 3rd Qu.: 0.3 3rd Qu.: 159 3rd Qu.: 35
## Max. :840 Max. :327 Max. :66.0 Max. :2692 Max. :644
##
## InboundCalls PeakCallsInOut OffPeakCallsInOut DroppedBlockedCalls
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 25 1st Qu.: 12 1st Qu.: 2
## Median : 2 Median : 64 Median : 38 Median : 6
## Mean : 9 Mean : 93 Mean : 70 Mean : 10
## 3rd Qu.: 10 3rd Qu.: 124 3rd Qu.: 92 3rd Qu.: 13
## Max. :519 Max. :2091 Max. :1475 Max. :412
##
## CallForwardingCalls CallWaitingCalls MonthsInService UniqueSubs
## Min. : 0.0 Min. : 0.0 Min. : 6.0 Min. : 1.00
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.:11.0 1st Qu.: 1.00
## Median : 0.0 Median : 0.3 Median :16.0 Median : 1.00
## Mean : 0.0 Mean : 1.9 Mean :18.6 Mean : 1.51
## 3rd Qu.: 0.0 3rd Qu.: 1.7 3rd Qu.:24.0 3rd Qu.: 2.00
## Max. :81.3 Max. :212.7 Max. :60.0 Max. :12.00
##
## ActiveSubs ServiceArea Handsets HandsetModels
## Min. : 0.00 Length:36336 Min. : 1.00 Min. : 1.00
## 1st Qu.: 1.00 Class :character 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 1.00 Mode :character Median : 1.00 Median : 1.00
## Mean : 1.35 Mean : 1.83 Mean : 1.58
## 3rd Qu.: 2.00 3rd Qu.: 2.00 3rd Qu.: 2.00
## Max. :11.00 Max. :24.00 Max. :15.00
## NA's :1 NA's :1
## CurrentEquipmentDays AgeHH1 AgeHH2 ChildrenInHH
## Min. : -5 Min. : 0 Min. : 0 Length:36336
## 1st Qu.: 197 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 310 Median :36 Median : 0 Mode :character
## Mean : 364 Mean :32 Mean :21
## 3rd Qu.: 493 3rd Qu.:48 3rd Qu.:44
## Max. :1812 Max. :99 Max. :98
## NA's :1 NA's :660 NA's :660
## HandsetRefurbished HandsetWebCapable TruckOwner RVOwner
## Length:36336 Length:36336 Length:36336 Length:36336
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Homeownership BuysViaMailOrder RespondsToMailOffers OptOutMailings
## Length:36336 Length:36336 Length:36336 Length:36336
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## NonUSTravel OwnsComputer HasCreditCard RetentionCalls
## Length:36336 Length:36336 Length:36336 Min. :0.000
## Class :character Class :character Class :character 1st Qu.:0.000
## Mode :character Mode :character Mode :character Median :0.000
## Mean :0.029
## 3rd Qu.:0.000
## Max. :3.000
##
## RetentionOffersAccepted NewCellphoneUser NotNewCellphoneUser
## Min. :0.000 Length:36336 Length:36336
## 1st Qu.:0.000 Class :character Class :character
## Median :0.000 Mode :character Mode :character
## Mean :0.015
## 3rd Qu.:0.000
## Max. :3.000
##
## ReferralsMadeBySubscriber IncomeGroup OwnsMotorcycle
## Min. : 0.0 Min. :0.00 Length:36336
## 1st Qu.: 0.0 1st Qu.:1.00 Class :character
## Median : 0.0 Median :5.00 Mode :character
## Mean : 0.1 Mean :4.35
## 3rd Qu.: 0.0 3rd Qu.:7.00
## Max. :35.0 Max. :9.00
##
## AdjustmentsToCreditRating HandsetPrice MadeCallToRetentionTeam
## Min. : 0.00 Length:36336 Length:36336
## 1st Qu.: 0.00 Class :character Class :character
## Median : 0.00 Mode :character Mode :character
## Mean : 0.06
## 3rd Qu.: 0.00
## Max. :25.00
##
## CreditRating PrizmCode Occupation MaritalStatus
## 1-Highest: 5894 Length:36336 Other :26705 Length:36336
## 2-High :13281 Class :character Professional: 6288 Class :character
## 3-Good : 5802 Mode :character Crafts : 1093 Mode :character
## 4-Medium : 3958 Clerical : 697
## 5-Low : 5063 Self : 636
## 6-VeryLow: 836 Retired : 548
## 7-Lowest : 1502 (Other) : 369
## perc_recurrent_charge perc_overage_minute
## Min. :-28.1 Min. : 0
## 1st Qu.: 74.0 1st Qu.: 0
## Median : 96.1 Median : 1
## Mean : Inf Mean : 6
## 3rd Qu.:110.4 3rd Qu.: 8
## Max. : Inf Max. :100
## NA's :89 NA's :367
Data Classification
feat_typ_counts <- data.frame(read.csv("Feat_type_counts.csv"))
#install.packages("plotrix")
library(plotrix)
library("ggplot2")
#pie(feat_typ_counts$Counts, feat_typ_counts$Variable.Type)
piepercent<- round(100 * feat_typ_counts$Counts / sum(feat_typ_counts$Counts), 1)
feat_typ_counts$fraction <- feat_typ_counts$Counts / sum(feat_typ_counts$Counts)
# Compute the cumulative percentages (top of each rectangle)
feat_typ_counts$ymax <- cumsum(feat_typ_counts$fraction)
# Compute the bottom of each rectangle
feat_typ_counts$ymin <- c(0, head(feat_typ_counts$ymax, n=-1))
# Compute label position
feat_typ_counts$labelPosition <- (feat_typ_counts$ymax + feat_typ_counts$ymin) / 2
# Compute a good label
feat_typ_counts$label <- paste0(feat_typ_counts$Variable.Type, "\n Count: ", feat_typ_counts$Counts)
ggplot(feat_typ_counts, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Variable.Type)) +
geom_rect() +
geom_label( x=3.5, aes(y=labelPosition, label=label), size=2) +
scale_fill_brewer(palette=4) +
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none")
Getting summary of the data
xkablesummary(Telecom_Data)
| CustomerID | Churn | MonthlyRevenue | MonthlyMinutes | TotalRecurringCharge | DirectorAssistedCalls | OverageMinutes | RoamingCalls | PercChangeMinutes | PercChangeRevenues | DroppedCalls | BlockedCalls | UnansweredCalls | CustomerCareCalls | ThreewayCalls | ReceivedCalls | OutboundCalls | InboundCalls | PeakCallsInOut | OffPeakCallsInOut | DroppedBlockedCalls | CallForwardingCalls | CallWaitingCalls | MonthsInService | UniqueSubs | ActiveSubs | ServiceArea | Handsets | HandsetModels | CurrentEquipmentDays | AgeHH1 | AgeHH2 | ChildrenInHH | HandsetRefurbished | HandsetWebCapable | TruckOwner | RVOwner | Homeownership | BuysViaMailOrder | RespondsToMailOffers | OptOutMailings | NonUSTravel | OwnsComputer | HasCreditCard | RetentionCalls | RetentionOffersAccepted | NewCellphoneUser | NotNewCellphoneUser | ReferralsMadeBySubscriber | IncomeGroup | OwnsMotorcycle | AdjustmentsToCreditRating | HandsetPrice | MadeCallToRetentionTeam | CreditRating | PrizmCode | Occupation | MaritalStatus | perc_recurrent_charge | perc_overage_minute | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Min | Min. :3000002 | No :36336 | Min. : -6 | Min. : 0 | Min. :-11 | Min. : 0.0 | Min. : 0 | Min. : 0 | Min. :-3875 | Min. :-1108 | Min. : 0.0 | Min. : 0 | Min. : 0 | Min. : 0 | Min. : 0.0 | Min. : 0 | Min. : 0 | Min. : 0 | Min. : 0 | Min. : 0 | Min. : 0 | Min. : 0.0 | Min. : 0.0 | Min. : 6.0 | Min. : 1.0 | Min. : 0.0 | Length:51047 | Min. : 1.00 | Min. : 1.00 | Min. : -5 | Min. : 0 | Min. : 0 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Length:51047 | Min. :0.00 | Min. :0.000 | Length:51047 | Length:51047 | Min. : 0.0 | Min. :0.00 | Length:51047 | Min. : 0.00 | Length:51047 | Length:51047 | 1-Highest: 8522 | Length:51047 | Other :37637 | Length:51047 | Min. :-29.5 | Min. : 0 |
| Q1 | 1st Qu.:3100632 | Yes:14711 | 1st Qu.: 34 | 1st Qu.: 158 | 1st Qu.: 30 | 1st Qu.: 0.0 | 1st Qu.: 0 | 1st Qu.: 0 | 1st Qu.: -83 | 1st Qu.: -7 | 1st Qu.: 0.7 | 1st Qu.: 0 | 1st Qu.: 5 | 1st Qu.: 0 | 1st Qu.: 0.0 | 1st Qu.: 8 | 1st Qu.: 3 | 1st Qu.: 0 | 1st Qu.: 23 | 1st Qu.: 11 | 1st Qu.: 2 | 1st Qu.: 0.0 | 1st Qu.: 0.0 | 1st Qu.:11.0 | 1st Qu.: 1.0 | 1st Qu.: 1.0 | Class :character | 1st Qu.: 1.00 | 1st Qu.: 1.00 | 1st Qu.: 205 | 1st Qu.: 0 | 1st Qu.: 0 | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | Class :character | 1st Qu.:0.00 | 1st Qu.:0.000 | Class :character | Class :character | 1st Qu.: 0.0 | 1st Qu.:0.00 | Class :character | 1st Qu.: 0.00 | Class :character | Class :character | 2-High :18993 | Class :character | Professional: 8755 | Class :character | 1st Qu.: 72.6 | 1st Qu.: 0 |
| Median | Median :3201534 | NA | Median : 48 | Median : 366 | Median : 45 | Median : 0.2 | Median : 3 | Median : 0 | Median : -5 | Median : 0 | Median : 3.0 | Median : 1 | Median : 16 | Median : 0 | Median : 0.0 | Median : 53 | Median : 14 | Median : 2 | Median : 62 | Median : 36 | Median : 5 | Median : 0.0 | Median : 0.3 | Median :16.0 | Median : 1.0 | Median : 1.0 | Mode :character | Median : 1.00 | Median : 1.00 | Median : 329 | Median :36 | Median : 0 | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Median :0.00 | Median :0.000 | Mode :character | Mode :character | Median : 0.0 | Median :5.00 | Mode :character | Median : 0.00 | Mode :character | Mode :character | 3-Good : 8410 | Mode :character | Crafts : 1519 | Mode :character | Median : 95.4 | Median : 1 |
| Mean | Mean :3201957 | NA | Mean : 59 | Mean : 526 | Mean : 47 | Mean : 0.9 | Mean : 40 | Mean : 1 | Mean : -12 | Mean : -1 | Mean : 6.0 | Mean : 4 | Mean : 28 | Mean : 2 | Mean : 0.3 | Mean : 115 | Mean : 25 | Mean : 8 | Mean : 91 | Mean : 68 | Mean : 10 | Mean : 0.0 | Mean : 1.8 | Mean :18.8 | Mean : 1.5 | Mean : 1.4 | NA | Mean : 1.81 | Mean : 1.56 | Mean : 381 | Mean :31 | Mean :21 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | Mean :0.04 | Mean :0.018 | NA | NA | Mean : 0.1 | Mean :4.32 | NA | Mean : 0.05 | NA | NA | 4-Medium : 5357 | NA | Clerical : 986 | NA | Mean : Inf | Mean : 6 |
| Q3 | 3rd Qu.:3305376 | NA | 3rd Qu.: 71 | 3rd Qu.: 723 | 3rd Qu.: 60 | 3rd Qu.: 1.0 | 3rd Qu.: 41 | 3rd Qu.: 0 | 3rd Qu.: 66 | 3rd Qu.: 2 | 3rd Qu.: 7.7 | 3rd Qu.: 4 | 3rd Qu.: 36 | 3rd Qu.: 2 | 3rd Qu.: 0.3 | 3rd Qu.: 154 | 3rd Qu.: 34 | 3rd Qu.: 9 | 3rd Qu.: 121 | 3rd Qu.: 89 | 3rd Qu.: 12 | 3rd Qu.: 0.0 | 3rd Qu.: 1.3 | 3rd Qu.:24.0 | 3rd Qu.: 2.0 | 3rd Qu.: 2.0 | NA | 3rd Qu.: 2.00 | 3rd Qu.: 2.00 | 3rd Qu.: 515 | 3rd Qu.:48 | 3rd Qu.:42 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 3rd Qu.:0.00 | 3rd Qu.:0.000 | NA | NA | 3rd Qu.: 0.0 | 3rd Qu.:7.00 | NA | 3rd Qu.: 0.00 | NA | NA | 5-Low : 6499 | NA | Self : 879 | NA | 3rd Qu.:109.3 | 3rd Qu.: 8 |
| Max | Max. :3399994 | NA | Max. :1223 | Max. :7359 | Max. :400 | Max. :159.4 | Max. :4321 | Max. :1112 | Max. : 5192 | Max. : 2484 | Max. :221.7 | Max. :384 | Max. :849 | Max. :327 | Max. :66.0 | Max. :2692 | Max. :644 | Max. :519 | Max. :2091 | Max. :1475 | Max. :412 | Max. :81.3 | Max. :212.7 | Max. :61.0 | Max. :196.0 | Max. :53.0 | NA | Max. :24.00 | Max. :15.00 | Max. :1812 | Max. :99 | Max. :99 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | Max. :4.00 | Max. :3.000 | NA | NA | Max. :35.0 | Max. :9.00 | NA | Max. :25.00 | NA | NA | 6-VeryLow: 1152 | NA | Retired : 733 | NA | Max. : Inf | Max. :100 |
| NA | NA | NA | NA’s :156 | NA’s :156 | NA’s :156 | NA’s :156 | NA’s :156 | NA’s :156 | NA’s :367 | NA’s :367 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA’s :1 | NA’s :1 | NA’s :1 | NA’s :909 | NA’s :909 | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | NA | 7-Lowest : 2114 | NA | (Other) : 538 | NA | NA’s :161 | NA’s :879 |
Box plot of the Monnthly Minutes
boxplot(Telecom_Data$MonthlyMinutes,
main = "Monthly Minutes of Customers",
xlab = "Monthly Min",
ylab = "Frequency",
col = "orange",
border = "brown",
horizontal = TRUE,
notch = TRUE
)
##Current Headset use in days
plot_ly(Telecom_Data, y= Telecom_Data$CurrentEquipmentDays, color = Telecom_Data$Churn, type = "box") %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
Boxplot of Total Recurring Charge
plot_ly(Telecom_Data, y= Telecom_Data$TotalRecurringCharge, color = Telecom_Data$Churn, type = "box") %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
Box plot of Month in Service
plot_ly(Telecom_Data, y= Telecom_Data$MonthsInService, color = Telecom_Data$Churn, type = "box") %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
Box plot of the Percent change in recurrent charge
plot_ly(Telecom_Data, y= Telecom_Data$perc_recurrent_charge, color = Telecom_Data$Churn, type = "box") %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
Box plot of Percent change in Minutes
plot_ly(Telecom_Data, y= Telecom_Data$PercChangeMinutes, color = Telecom_Data$Churn, type = "box") %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
Box plot of Percent change in Revenues
plot_ly(Telecom_Data, y= Telecom_Data$PercChangeRevenues, color = Telecom_Data$Churn, type = "box") %>%
layout(boxmode = "group",
xaxis = list(title=''),
yaxis = list(title='Frequency'))
Distribution of the Montly Revenue
library(ggplot2)
library(plotly)
set.seed(1)
gg <- ggplot(Telecom_Data,aes(x = MonthlyRevenue, color = 'density')) +
geom_histogram(aes(y = ..density..), bins = 7, fill = '#67B7D1', alpha = 0.5) +
geom_density(color = '#67B7D1') +
geom_rug(color = '#67B7D1') +
ylab("") +
xlab("") + theme(legend.title=element_blank()) +
scale_color_manual(values = c('density' = '#67B7D1'))
ggplotly(gg)%>%
layout(plot_bgcolor='#e5ecf6',
xaxis = list(
title='Time',
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(
title='Monthly Revenue',
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'))
Distribution of Monthly Minutes
library(ggplot2)
library(plotly)
set.seed(1)
gg <- ggplot(Telecom_Data,aes(x = MonthlyMinutes, color = 'density')) +
geom_histogram(aes(y = ..density..), bins = 7, fill = '#67B7D1', alpha = 0.5) +
geom_density(color = '#67B7D1') +
geom_rug(color = '#67B7D1') +
ylab("") +
xlab("") + theme(legend.title=element_blank()) +
scale_color_manual(values = c('density' = '#67B7D1'))
ggplotly(gg)%>%
layout(plot_bgcolor='#e5ecf6',
xaxis = list(
title='Monthly Minutes ',
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(
title='Frequency',
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'))
qqnorm(Telecom_Data$MonthlyMinutes) # QQplot
qqline(Telecom_Data$MonthlyMinutes, col = "red")
#install.packages("car")
#library("car")
#qqPlot(Telecom_Data$MonthlyMinutes)
library("plotly")
#plot_ly(Telecom_Data, y= Telecom_Data$AgeHH1, color = Telecom_Data$Churn, type = "box")
#layout(boxmode = "group",
# xaxis = list(title=''),
# yaxis = list(title='Frequency'))
itgi